capture log close
clear all
set more off
pause off

local path ""
local log ""
local data ""

clear all
use "`data'\Form_5500_data.dta" 

rename YEAR year, replace

* Does plan have retirees on it? (7b)
count if RTD_SEP_PARTCP_RCVG_CNT==.
gen retiree_benefit_ind = 0 if RTD_SEP_PARTCP_RCVG_CNT==0
replace retiree_benefit_ind = 1 if RTD_SEP_PARTCP_RCVG_CNT>0&RTD_SEP_PARTCP_RCVG_CNT<.

* values for TYPE_PENSION_BENEFIT_IND (item 6b)
gen DB_ind = 0
replace DB_ind = 1 if TYPE_PENSION_BENEFIT_IND==1

gen DC_ind = 0
replace DC_ind = 1 if TYPE_PENSION_BENEFIT_IND==2|TYPE_PENSION_BENEFIT_IND==4| ///
TYPE_PENSION_BENEFIT_IND==5|TYPE_PENSION_BENEFIT_IND==6

gen any_plan_ind = DB_ind + DC_ind


* Does plan relate to collective bargaining (13a)

gen collective_ind = 0
replace collective_ind = 1 if COLLECTIVE_BARGAIN_IND==1

* Employer EIN (1b)
rename SPONS_DFE_EIN EIN, replace
cap drop temp
rename EIN temp, replace
gen EIN  = string(temp,"%09.0f")
drop temp

* Business code (1d), could be thought of as industry
rename BUSINESS_CODE industry, replace

* Employer size - employees (22h/22b)
rename QUAL_TOT_NUM_EMPLOYEES_CNT num_employee, replace

* Number of emplyees qualified for plan (22j) - could be thought of as plan size?
rename QUAL_TOT_EMP_NOT_EXCLD_CNT num_qual_employee, replace
cap drop temp
gen temp = num_employee-num_qual_employee
gen temp2=temp/num_employee
replace num_qual_employee = . if temp2<0
drop temp*

* ADMIN_EIN (2b) - could be different from employer
rename ADMIN_EIN admin_EIN, replace

* Financial variables of PLAN

* Total assets (average beginning and end of year)
gen mean_asset = (TOT_ASSETS_BOY_AMT + TOT_ASSETS_EOY_AMT)/2
* Total liabilities (average beginning and end of year)
gen mean_debt = (TOT_LIABILITIES_BOY_AMT + TOT_LIABILITIES_EOY_AMT)/2
* Net assets
gen mean_net_asset = (NET_ASSETS_BOY_AMT + NET_ASSETS_EOY_AMT)/2

* Income flows within the plan
* Contribution amounts
* By employer
rename EMPLR_CONTRIB_INCOME_AMT employer_contribution, replace
* Any
rename TOT_CONTRIB_AMT any_contribution, replace

* Returns - could be indicative of how well-managed
* Returns on investments
cap drop retur*
gen returns = TOTAL_INTEREST_AMT + GAIN_LOSS_REG_INVST_AMT + TOTAL_DIVIDENDS_AMT + ///
TOTAL_RENTS_AMT             
* profitability
cap drop profit*
gen profitability = returns / TOT_ASSETS_BOY_AMT

* Flows
rename TOT_INCOME_AMT tot_income, replace
rename TOT_EXPENSES_AMT tot_exp, replace
rename NET_INCOME_AMT net_income, replace

* Growth (net assets are positive for almost all plans)
gen growth = net_income/NET_ASSETS_BOY_AMT
replace growth = . if NET_ASSETS_BOY_AMT<0

* Trim at 1% and 99%
sum growth, d
replace growth = . if growth<=-.9996262 | growth>=2.515386

* Was plan terminated or about to be terminated?
replace PLAN_TERM_YEAR = . if PLAN_TERM_YEAR==219|PLAN_TERM_YEAR==1
replace PLAN_TERM_YEAR = PLAN_TERM_YEAR+1900 if PLAN_TERM_YEAR<100
gen temp = PLAN_TERM_YEAR-year

cap drop terminated
gen terminated = 0
replace terminated = 1 if PLAN_TERM_YEAR!=.

cap drop temp
rename year temp, replace
gen year  = string(temp,"%04.0f")
drop temp

keep EIN year retiree_benefit_ind DB_ind DC_ind ///
collective_ind num_employee num_qual_employee ///
mean_net_asset employer_contribution any_contribution ///
profitability growth terminated

/* Collapse by EIN over years (1990-1998) */
drop year  
collapse (max) retiree_benefit_ind (max) DB_ind (max) DC_ind ///
(max) collective_ind (mean) num_employee (mean) num_qual_employee ///
(mean) mean_net_asset (mean) employer_contribution (mean) any_contribution ///
(mean) profitability (mean) growth (max) terminated, by(EIN)

save "`data'\EIN_collapsed.dta", replace
